In [ ]:
!pip install numpy plotly pandas
Requirement already satisfied: numpy in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (1.26.2)
Requirement already satisfied: plotly in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (5.18.0)
Requirement already satisfied: pandas in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (2.1.4)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from plotly) (8.2.3)
Requirement already satisfied: packaging in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from plotly) (23.2)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from pandas) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\looper\appdata\local\programs\python\python311\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
[notice] A new release of pip available: 22.3.1 -> 23.3.2
[notice] To update, run: C:\Users\Looper\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip
In [ ]:
import pandas as pd
import numpy as np
import plotly.express as px
data= pd.read_csv("spotify-2023.csv")
data['streams'] = pd.to_numeric(data['streams'], errors='coerce')
In [ ]:
most_streamed = data.loc[data.groupby('released_year')['streams'].idxmax()]

clean_data = most_streamed[['track_name', 'artist(s)_name', 'released_year', 'streams']]
fig = px.bar(clean_data, x='released_year', y='streams')

fig.show(renderer='notebook')
In [ ]:
top_songs=data.groupby('released_year').apply(lambda group: group.nlargest(3, 'streams')).reset_index(drop=True)
top_songs=top_songs[['track_name', 'artist(s)_name', 'released_year', 'streams']]
top_songs= top_songs.query(""" released_year>=2010""")
top_songs
fig = px.bar(top_songs,x='released_year', y='streams',title='top_songs',hover_data=['track_name','artist(s)_name'],color_continuous_scale="Thermal",color="streams")



fig.show(renderer='notebook')
In [ ]:
song_keys=data[["released_year","key","streams"]]
group=song_keys.groupby('key')['streams'].sum().reset_index()

def to_millions(x):
    return x/1e6